from transformers import AutoTokenizer, AutoModel
import time
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True).half().cuda()
model = model.eval()

import gradio as gr
def greet2(name):
    response, history = model.chat(tokenizer, name, history=[])    # print(response)
    return response

def alternatingly_agree(message, history):
   return greet2(message)

gr.ChatInterface(alternatingly_agree).launch()

